package com.duowan.cms.service.article.util;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;

import com.duowan.cms.common.util.IdManager;
import com.duowan.cms.common.util.NetUtil;
import com.duowan.cms.common.util.PathUtil;
import com.duowan.cms.common.util.StringUtil;
import com.duowan.cms.dto.article.ArticleImageDownloadInfo;
import com.duowan.cms.dto.article.ArticleInfo;
import com.duowan.cms.dto.channel.ChannelInfo;

/**
 * 处理文章内容里的图片URL
 * <p>作　　者：黄均杨
 * <p>完成日期：2012-11-22
 */
public class ArticleImageDealer {

    private static Logger logger = Logger.getLogger(ArticleImageDealer.class);

    /**
    * 处理文章内容中的图片：下载，复制到rsync目录，替换文章内容的图片src地址
    * @author yzq
    * @param content
    * @param channelId
    * @param articleId
    * @return
    */
    public String dealWithImgInContent(ArticleInfo articleInfo) {

        String content = articleInfo.getContent();
        ChannelInfo channelInfo = articleInfo.getChannelInfo();
        Long articleId = articleInfo.getId();
        //String title = articleInfo.getTitle();

        if (StringUtil.isEmpty(content))
            return "";

        // 匹配内容中的图片信息
        Pattern p = Pattern.compile("<img.*?src=\"?\\s?([^\\s>\"]+)\"?", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
        Matcher m = p.matcher(content);
        String url = null; // 图片url
        String picExName = null;// 图片扩展名
        long picId = IdManager.generateId();// 图片id
        // 下载图片列表(url, storepath, rsyncpath)
        List<ArticleImageDownloadInfo> downloadList = null;
        while (m.find()) {
            url = m.group(1);
            if (!StringUtil.isEmpty(url) && this.isNeedCatch(url)) {
                picExName = StringUtil.getFileNameExtension(url);
                if (!StringUtil.isPicExtension(picExName))
                    continue;
                // 图片名
                picId += 1;
                String picName = picId + "." + picExName;
                // 放进下载列表和复制图片列表
                if (null == downloadList) {
                    downloadList = new ArrayList<ArticleImageDownloadInfo>();
                }
                downloadList.add(new ArticleImageDownloadInfo(url, PathUtil.getPicPath(channelInfo.getPicFilePath(), articleId, picName)));
                content = content.replaceAll(url, PathUtil.getPicUrlOnline(channelInfo.getPicDomain(), articleId, picName)); // 替换内容
            }
        }
        // 应该新起线程进得下载
        if (downloadList != null && !downloadList.isEmpty()) {
            downloadImageThread(downloadList);
        }
        return content;
        //入库时不必做此处理，留到刷文章静态页时再处理
        //content = this.filtrateDuowanImages(content);
        //return this.addPicAlt(content, title);
    }

    /**
     * 多线程下载图片
     * @author yzq
     * @param downloadList
     */
    private void downloadImageThread(final List<ArticleImageDownloadInfo> downloadList) {

        new Thread(new Runnable() {

            public void run() {
                // 下载图片列表
                List<ArticleImageDownloadInfo> thread_downloadList = downloadList;

                // 临时存放下载不成功的图片列表
                List<ArticleImageDownloadInfo> tmp_downloadList = null;

                // 重试次数
                int retryTimes = 3;

                while (thread_downloadList != null && retryTimes > 0) {
                    for (ArticleImageDownloadInfo info : thread_downloadList) {
                        boolean catch_ok = NetUtil.downloadImage(info.getDownloadUrl(), info.getStorePath());
                        if (catch_ok) {
                            // try {
                            // // 复制图片到rsync目录
                            // FileUtils.copyFile(new File(info.getStorePath()),
                            // new File(info.getRsyncPath()));
                            // } catch (IOException e) {
                            // logger.warn("copy error!!");
                            // }
                            logger.info("成功抓取图片, netUrl=" + info.getDownloadUrl() + "，存放地址：" + info.getStorePath());
                        } else {
                            if (tmp_downloadList == null) {
                                tmp_downloadList = new ArrayList<ArticleImageDownloadInfo>();
                            }
                            logger.info("第" + (3 - retryTimes + 1) + "次下载失败!url:" + info.getDownloadUrl());
                            tmp_downloadList.add(info);
                        }
                    }
                    thread_downloadList = tmp_downloadList;
                    tmp_downloadList = null;
                    retryTimes--;
                }
            }
        }).start();
    }

    /**
     * 判断图片URL是否需要抓取
     * @param url 图片URL
     */
    public boolean isNeedCatch(String url) {
        
       //以下域名属于多玩域名，但是也需要抓取
        if(url.indexOf("bbs.duowan.com") != -1 || url.indexOf("comment2.duowan.com/contribute") != -1 || url.indexOf("tougao.duowan.com") != -1 || url.indexOf("tools.duowan.com") != -1){
            return true;
        }
        //多玩域名，不需要抓取
        if(url.indexOf("duowan.com") != -1 || url.indexOf("dwstatic.com") != -1) // 包含dwstatic.com ， 属于多玩图片新域名
            return false;
        
        return true;
    }
    


}
